library(ggplot2)
library(readr)
library(ggmap)
library(maps)
library(stringr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
##
## inset
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(rgeos)
## rgeos version: 0.3-23, (SVN revision 546)
## GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921
## Linking to sp version: 1.2-4
## Polygon checking: TRUE
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(RColorBrewer)
bil files (band interleaved by lines)hdr files (associated header file)filenames<-list.files("./data/PRISM/min_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/min_temp/PRISM",filenames)
require(maptools)
require(raster)
mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")
#Minimum Temperature
r<-raster(filenames[1])
z<-raster::extract(r,US_Counties_Polygons)
min_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
names(min_temp)<-c("year","fips","av")
for (i in 2:16){
r<-raster(filenames[i])
this.year<-i+1999
z<-raster::extract(r,US_Counties_Polygons)
tmp<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
names(tmp)<-c("year","fips","av")
min_temp<-rbind(min_temp,tmp)
}
filenames<-list.files("./data/PRISM/max_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/max_temp/PRISM",filenames)
require(maptools)
require(raster)
mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")
#Maximum Temperature
s<-raster(filenames[1])
y<-raster::extract(s,US_Counties_Polygons)
max_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
names(max_temp)<-c("year","fips","av")
for (i in 2:16){
s<-raster(filenames[i])
this.year<-i+1999
y<-raster::extract(s,US_Counties_Polygons)
tmp1<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
names(tmp1)<-c("year","fips","av")
max_temp<-rbind(max_temp,tmp1)
}
write.csv(min_temp,file="MinTemp")
write.csv(max_temp,file="MaxTemp")
#CDC data
library(readr)
ld<-read_csv("./data/CDC/ld-case-counts-by-county-00-15.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## STNAME = col_character(),
## CTYNAME = col_character()
## )
## See spec(...) for full column specifications.
#Census
pop<-read_csv("./data/CENSUS/county_population.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## fips = col_character(),
## areaname = col_character(),
## state_name = col_character(),
## county_name = col_character(),
## fipsst = col_character(),
## fipsco = col_character()
## )
## See spec(...) for full column specifications.
#Load all PRISM data
load("get_PRISM.Rda")
mintemp<-read.csv("MinTemp")
maxtemp<-read.csv("MaxTemp")
#Load saved dataframes
load("get_DATA.Rda")
#Remove X column
mintemp$X<-NULL
maxtemp$X<-NULL
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(magrittr)
library(dplyr)
pop %<>% select(fips,state_name,starts_with("pop2"))
pop %<>% gather(starts_with("pop2"),key="str_year",value="size") %>% na.omit
pop %<>% mutate(year=gsub("pop","",str_year))
pop %<>% mutate(year=as.numeric(year))
pop %<>% mutate(fips=gsub("^0","",fips))
pop %<>% mutate(fips=as.integer(fips))
pop$str_year<-NULL
names(pop)[names(pop) == 'size'] <- 'pop'
ld %<>% gather(starts_with("Cases"),key="str_year",value="cases")
ld %<>% mutate(year=gsub("Cases","",str_year))
ld %<>% mutate(year=as.numeric(year))
ld %<>% rename(state=STNAME,county=CTYNAME)
fips.builder<-function(st,ct){
if (nchar(ct)==3){
fips<-paste(as.character(st),as.character(ct),sep="") %>% as.integer
}
else if (nchar(ct)==2){
fips<-paste(as.character(st),"0",as.character(ct),sep="") %>% as.integer
}
else {
fips<-paste(as.character(st),"00",as.character(ct),sep="") %>% as.integer
}
return(fips)
}
ld %<>% rowwise() %>% mutate(fips=fips.builder(STCODE,CTYCODE))
ld %<>% select(-c(STCODE,CTYCODE,str_year))
#Join prism prcp and avtmp with ld
ld.total<-inner_join(ld,prism)
#Join with mintemp and rename av column to mintmp
ld.total<-inner_join(ld.total,mintemp)
names(ld.total)[names(ld.total) == 'av'] <- 'mintmp'
#Join with maxtemp and rename av column to maxtmp
ld.total<-inner_join(ld.total,maxtemp)
names(ld.total)[names(ld.total) == 'av'] <- 'maxtmp'
#Join with pop
ld.pop<-inner_join(ld.total,pop)
## Joining, by = c("year", "fips")
ld.pop$state_name<-NULL
## CENSES data only goes until 2014, so obs go from 49744 to 46630
#Compare countyTick and ld.total county names to ensure congruence for final merge: countyTick contains 3080 obs, so missing 29 counties, rename county and state to County and State
names(ld.total)[names(ld.total) == 'county'] <- 'County'
names(ld.total)[names(ld.total) == 'state'] <- 'State'
countyTick<-read_csv("./data/TICK/countyTick.csv")
#countyTick data frame lists only county name, ld.total lists "County" following county name
ld.total$County<-gsub(" County","",ld.total$County)
ld.total$County<-gsub(" Parish","",ld.total$County)
#Need to remove some unecessary columns, only need first 3 and tick_presence
countyTick$State_FIPS<-NULL
countyTick$County_FIPS<-NULL
countyTick$fipsname<-NULL
countyTick$polyname<-NULL
ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")
# repeats in countyTick - solution:
FIPScounter<-table(countyTick$fips)
FIPScounter<-FIPScounter[FIPScounter>1]
problemFIPS<-as.integer(names(FIPScounter))
problemFIPS.locs<-NULL
for (k in problemFIPS){
locs<-which(countyTick$fips==k)
locs<-locs[-1]
problemFIPS.locs<-c(problemFIPS.locs,locs)
}
countyTick<-countyTick[-problemFIPS.locs,]
## VA cities - TO BE REVISITED: JL "CITY PROBABLY HAS HIGH CASE NUMBERS SO DON"T THROW DATA AWAY IF WE DON"T HAVE TO"
tmp1<-subset(ld.total,year==2008 & State=="Virginia")
tmp2<-subset(countyTick,State=="Virginia")
setdiff(tmp1$County,tmp2$County)
addCity<-setdiff(tmp2$County,tmp1$County)
for (i in 1:dim(countyTick)[1]){
if (countyTick$State[i]=="Virginia"){
if (countyTick$County[i] %in% addCity){
countyTick$County[i]<-paste(countyTick$County[i],"city",sep=" ")
}
}
}
#Fixing Inconsistencies
countyTick %<>% mutate(County=str_replace_all(County,"Mountrial","Mountrail"))
countyTick %<>% mutate(County=str_replace_all(County,"Miami Dade","Miami-Dade"))
countyTick %<>% mutate(County=str_replace_all(County,"De Kalb","DeKalb"))
ld.total %<>% mutate(County=str_replace_all(County,"District of Columbia","Washington"))
countyTick %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
ld.total %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
countyTick %<>% mutate(County=str_replace_all(County,"Du Page","DuPage"))
countyTick %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
ld.total %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
countyTick %<>% mutate(County=str_replace_all(County,"Lagrange","LaGrange"))
countyTick %<>% mutate(County=str_replace_all(County,"La Porte","LaPorte"))
countyTick %<>% mutate(County=str_replace_all(County,"LaFourche","Lafourche"))
countyTick %<>% mutate(County=str_replace_all(County,"Prince Georges","Prince George's"))
countyTick %<>% mutate(County=str_replace_all(County,"Queen Annes","Queen Anne's"))
countyTick %<>% mutate(County=str_replace_all(County,"St. Marys","St. Mary's"))
countyTick %<>% mutate(County=str_replace_all(County,"Lac Qui Parle","Lac qui Parle"))
ld.total %<>% mutate(County=str_replace_all(County,"Do\U3e34613ca Ana","Dona Ana"))
countyTick %<>% mutate(County=str_replace_all(County,"La Moure","LaMoure"))
countyTick %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
ld.total %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
countyTick %<>% mutate(County=str_replace_all(County,"Fond Du Lac","Fond du Lac"))
# Adding missing counties to countyTick
countyTick.add<-rbind(countyTick,c(55078,"Menominee","Wisconsin",2))
countyTick.add<-rbind(countyTick.add,c(4012,"La Paz","Arizona",1))
countyTick.add<-rbind(countyTick.add,c(8014,"Broomfield","Colorado",1))
countyTick<-countyTick.add
countyTick$SC<-paste(countyTick$County,countyTick$State,sep=",")
ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")
# Adding missing cities to countyTick
newCity<-data.frame(fips=c(24510,29510,51510,51515,51520,51530,51540,51550,51570,51580,51590,51595,51600,51610,51620,51630,51640,51660,51670,51678,51680,51683,51685,51690,51720,51730,51735,51740,51750,51760,51770,51775,51790,51820,51830,51840),County=c("Baltimore city","St. Louis city","Alexandria city","Bedford city","Bristol city","Buena Vista city","Charlottesville city","Chesapeake city","Colonial Heights city","Covington city","Danville city","Emporia city","Fairfax city","Falls Church city","Franklin city","Fredericksburg city","Galax city","Harrisonburg city","Hopewell city","Lexington city","Lynchburg city","Manassas city","Manassas Park city","Martinsville city","Norton city","Petersburg city","Poquoson city","Portsmouth city","Radford city","Richmond city","Roanoke city","Salem city","Staunton city","Waynesboro city","Williamsburg city","Winchester city"),State=c("Maryland","Missouri","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia"),Tick_presence=c(2,1,2,1,1,2,2,2,2,1,2,1,1,1,2,2,2,2,2,2,2,2,2,1,1,1,2,1,2,2,2,2,2,2,1,2))
newCity$SC<-paste(newCity$County,newCity$State,sep=",")
countyTick<-rbind(newCity,countyTick)
setdiff(ld.total$County,countyTick$County)
# No differences in dataframes
countyTick$fips<-as.numeric(countyTick$fips)
countyTick$County<-as.character(countyTick$County)
countyTick$State<-as.character(countyTick$State)
countyTick$Tick_presence<-as.numeric(countyTick$Tick_presence)
ld.tick<-inner_join(ld.total,countyTick)
ld.tick$Presence_current<-ld.tick$Tick_presence
ld.tick$Presence_current<-ifelse(ld.tick$Presence_current%in%c(1,3),0,1)
ld.tick %>% filter(cases>0) %>% ggplot(aes(x=as.factor(Presence_current),y=cases))+geom_boxplot()+scale_y_log10()
county_map<-map_data("county")
county_map %<>% mutate(region=str_to_title(region))
county_map %<>% mutate(subregion=str_to_title(subregion))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Kalb","DeKalb"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Clair","St. Clair"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francis","St. Francis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Soto","DeSoto"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Johns","St. Johns"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lucie","St. Lucie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcduffie","McDuffie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcintosh","McIntosh"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Salle","LaSalle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonough","McDonough"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mchenry","McHenry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclean","McLean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lagrange","LaGrange"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Porte","LaPorte"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Joseph","St. Joseph"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Obrien","O'Brien"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcpherson","McPherson"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccracken","McCracken"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccreary","McCreary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Bernard","St. Bernard"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Charles","St. Charles"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Helena","St. Helena"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St James","St. James"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St John The Baptist","St. John the Baptist"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Landry","St. Landry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Martin","St. Martin"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Mary","St. Mary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Tammany","St. Tammany"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Prince Georges","Prince George's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Queen Annes","Queen Anne's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Marys","St. Mary's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Baltimore City","Baltimore city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lac Qui Parle","Lac qui Parle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lake Of The Woods","Lake of the Woods"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcleod","McLeod"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Louis","St. Louis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonald","McDonald"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francois","St. Francois"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Louis City","St. Louis city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Ste Genevieve","Ste. Genevieve"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lewis And Clark","Lewis and Clark"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccone","McCone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Yellowstone National","Yellowstone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckinley","McKinley"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lawrence","St. Lawrence"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdowell","McDowell"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Moure","LaMoure"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckenzie","McKenzie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcclain","McClain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccurtain","McCurtain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckean","McKean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccormick","McCormick"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccook","McCook"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcminn","McMinn"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcnairy","McNairy"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcculloch","McCulloch"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclennan","McLennan"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcmullen","McMullen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Isle Of Wight","Isle of Wight"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"King And Queen","King and Queen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Newport News","Newport News city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Virginia Beach","Virginia Beach city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Fond Du Lac","Fond du Lac"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Croix","St. Croix"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Witt","DeWitt"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Du Page","DuPage"))
#Changing Virginia cities to associated counties
ld.tick.county<-ld.tick
ld.tick.county%<>% mutate(County=str_replace_all(County,"Alexandria city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bedford city","Bedford"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bristol city","Washington"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Buena Vista city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Charlottesville city","Albemarle"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Chesapeake city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Colonial Heights city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Covington city","Alleghany"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Danville city","Pittsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Emporia city","Greensville"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fairfax city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Falls Church city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Franklin city","Accomack"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fredericksburg city","Spotsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Galax city","Carroll"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hampton city","Hampton"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Harrisonburg city","Rockingham"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hopewell city","Prince George"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lexington city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lynchburg city","Campbell"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas Park city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Martinsville city","Henry"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norfolk city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norton city","Wise"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Petersburg city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Poquoson city","York"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Portsmouth city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Radford city","Montgomery"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Richmond city","Henrico"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Roanoke city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Salem city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Staunton city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Suffolk city","Suffolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Waynesboro city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Williamsburg city","James City"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Winchester city","Frederick"))
ld.tick.county$SC<-paste(ld.tick.county$County,ld.tick.county$State,sep=",")
county_map %<>% mutate(SC=paste(subregion,region,sep=","))
setdiff(county_map$subregion,ld.tick.county$County)
setdiff(ld.tick.county$County,county_map$subregion)
ld.av.edit<-aggregate(ld.tick.county$cases,by=list(ld.tick.county$SC,ld.tick.county$Presence_current),FUN=mean)
names(ld.av.edit)<-c("SC","Presence_current","av.cases")
tmp1<-inner_join(county_map,ld.av.edit)
## Joining, by = "SC"
case.0<-subset(tmp1,Presence_current==0)
case.2<-subset(case.0,av.cases>2)
case.1<-subset(case.0,av.cases>1)
case.1<-unique(case.1)
case.2<-unique(case.2)
#[1] "Maricopa,Arizona" "Pima,Arizona"
#[3] "Alameda,California" "Contra Costa,California"
#[5] "Humboldt,California" "Los Angeles,California"
#[7] "Marin,California" "Mendocino,California"
#[9] "Nevada,California" "Riverside,California"
#[11] "San Diego,California" "San Francisco,California"
#[13] "San Mateo,California" "Santa Barbara,California"
#[15] "Santa Clara,California" "Santa Cruz,California"
#[17] "Sonoma,California" "Marion,Indiana"
#[19] "Starke,Indiana" "Johnson,Kansas"
#[21] "Allegan,Michigan" "Kent,Michigan"
#[23] "Ottawa,Michigan" "Washtenaw,Michigan"
#[25] "Clearwater,Minnesota" "Lake,Minnesota"
#[27] "Polk,Minnesota" "Douglas,Nebraska"
#[29] "Lancaster,Nebraska" "Clark,Nevada"
#[31] "Washoe,Nevada" "Cass,North Dakota"
#[33] "Grand Forks,North Dakota" "Delaware,Ohio"
#[35] "Franklin,Ohio" "Hamilton,Ohio"
#[37] "Montgomery,Ohio" "Douglas,Oregon"
#[39] "Jackson,Oregon" "Josephine,Oregon"
#[41] "Multnomah,Oregon" "Allegheny,Pennsylvania"
#[43] "Armstrong,Pennsylvania" "Beaver,Pennsylvania"
#[45] "Butler,Pennsylvania" "Clarion,Pennsylvania"
#[47] "Fayette,Pennsylvania" "Indiana,Pennsylvania"
#[49] "Lawrence,Pennsylvania" "Mercer,Pennsylvania"
#[51] "Somerset,Pennsylvania" "Washington,Pennsylvania"
#[53] "Westmoreland,Pennsylvania" "Brown,Texas"
#[55] "Frederick,Virginia" "James City,Virginia"
#[57] "Pulaski,Virginia" "Rockbridge,Virginia"
#[59] "Shenandoah,Virginia" "Warren,Virginia"
#[61] "King,Washington"
ggplot(tmp1,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(tmp1$Presence_current==0,"red","black"),fill=ifelse(tmp1$av.cases>2,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)#Counties outlined in red and filled with gold have 0 tick presence but av.cases>2
tmp<-aggregate(ld.tick.county$prcp,by=list(ld.tick.county$SC,ld.tick.county$Presence_current,ld.tick.county$year,ld.tick.county$cases),FUN=mean)
names(tmp)<-c("SC","Presence_current","year","cases","av.prcp")
#2000: Washington Oregon, northeast, Florida and Texas area lacking cases
tmp2000<-subset(tmp,year==2000)
map.2000<-inner_join(tmp2000,county_map)
## Joining, by = "SC"
ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$Presence_current==0,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2001: some case expansion near Oregon and Texas
tmp2001<-subset(tmp,year==2001)
map.2001<-inner_join(tmp2001,county_map)
## Joining, by = "SC"
ggplot(map.2001,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2001$Presence_current==0,"red","black"),fill=ifelse(map.2001$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2002: same as 2001 with some Northeast expansion
tmp2002<-subset(tmp,year==2002)
map.2002<-inner_join(tmp2002,county_map)
## Joining, by = "SC"
ggplot(map.2002,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2002$Presence_current==0,"red","black"),fill=ifelse(map.2002$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2003: Northeast expansion W-O regression
tmp2003<-subset(tmp,year==2003)
map.2003<-inner_join(tmp2003,county_map)
## Joining, by = "SC"
ggplot(map.2003,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2003$Presence_current==0,"red","black"),fill=ifelse(map.2003$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2004: California and north (Michigan) expansion
tmp2004<-subset(tmp,year==2004)
map.2004<-inner_join(tmp2004,county_map)
## Joining, by = "SC"
ggplot(map.2004,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2004$Presence_current==0,"red","black"),fill=ifelse(map.2004$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2005: same as 2004 except loss of one midwwest county and Texas expansion**
tmp2005<-subset(tmp,year==2005)
map.2005<-inner_join(tmp2005,county_map)
## Joining, by = "SC"
ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$Presence_current==0,"red","black"),fill=ifelse(map.2005$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2006: Texas, northern, and California regression**
tmp2006<-subset(tmp,year==2006)
map.2006<-inner_join(tmp2006,county_map)
## Joining, by = "SC"
ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$Presence_current==0,"red","black"),fill=ifelse(map.2006$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2007: Still no Texas, Southwest expansion
tmp2007<-subset(tmp,year==2007)
map.2007<-inner_join(tmp2007,county_map)
## Joining, by = "SC"
ggplot(map.2007,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2007$Presence_current==0,"red","black"),fill=ifelse(map.2007$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2008: Texas expansion, a lot of northwest expansion**
tmp2008<-subset(tmp,year==2008)
map.2008<-inner_join(tmp2008,county_map)
## Joining, by = "SC"
ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$Presence_current==0,"red","black"),fill=ifelse(map.2008$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2009: Southern California expasion, Texas maintanence**
tmp2009<-subset(tmp,year==2009)
map.2009<-inner_join(tmp2009,county_map)
## Joining, by = "SC"
ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$Presence_current==0,"red","black"),fill=ifelse(map.2009$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2010: Southern Cali regression
tmp2010<-subset(tmp,year==2010)
map.2010<-inner_join(tmp2010,county_map)
## Joining, by = "SC"
ggplot(map.2010,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2010$Presence_current==0,"red","black"),fill=ifelse(map.2010$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2011: Texas regression, Southern Cali and W-O expansion
tmp2011<-subset(tmp,year==2011)
map.2011<-inner_join(tmp2011,county_map)
## Joining, by = "SC"
ggplot(map.2011,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2011$Presence_current==0,"red","black"),fill=ifelse(map.2011$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2012: Northwest and Southern Cali regression
tmp2012<-subset(tmp,year==2012)
map.2012<-inner_join(tmp2012,county_map)
## Joining, by = "SC"
ggplot(map.2012,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2012$Presence_current==0,"red","black"),fill=ifelse(map.2012$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2013: A lot of mid west expansion**
tmp2013<-subset(tmp,year==2013)
map.2013<-inner_join(tmp2013,county_map)
## Joining, by = "SC"
ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$Presence_current==0,"red","black"),fill=ifelse(map.2013$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2014: midwest regression
tmp2014<-subset(tmp,year==2014)
map.2014<-inner_join(tmp2014,county_map)
## Joining, by = "SC"
ggplot(map.2014,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2014$Presence_current==0,"red","black"),fill=ifelse(map.2014$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2015: same as 2014
tmp2015<-subset(tmp,year==2015)
map.2015<-inner_join(tmp2015,county_map)
## Joining, by = "SC"
ggplot(map.2015,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2015$Presence_current==0,"red","black"),fill=ifelse(map.2015$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
cnames.1 <- aggregate(cbind(long, lat) ~ SC, data=case.1, FUN=function(x) mean(range(x)))
cnames.2 <- aggregate(cbind(long, lat) ~ SC, data=case.2, FUN=function(x) mean(range(x)))
ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$SC %in% cnames.1$SC,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)#If outlined in red than av.case>1 and tick presence=0
Important year transitions: 2005(cases)-2006(loss of cases) 2008-2009: big case years all around *2013: Midwest cases
#2005-2006 case comparison: 1014.887 and 1035.723 are mean of av.prcp for given year; prcp goes below avg in S.Cali & Midwest but above average in Michigan; precipitation influence only in northeast?
ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2005$av.prcp>1014.887,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2006$av.prcp>1035.723,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2008-2009: Rainy northeast midwest, dry California
ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2008$av.prcp>1075.407,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2009$av.prcp>1178.298,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#2013: Dry year northwest, rainy for some of midwest (less than 2008-2009)
ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2013$av.prcp>1132.778,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)
#County Classifications
class<-read.csv("~/lyme_modeling/data/CENSUS/County Classifications.csv")
#Income
income<-read.csv("~/lyme_modeling/data/CENSUS/Income.csv")
#Jobs
jobs<-read.csv("~/lyme_modeling/data/CENSUS/Jobs.csv")
#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")
jobs %<>% gather(starts_with("UnempRate"),key="str_year",value="UnemploymentRate")
jobs %<>% mutate(year=gsub("UnempRate","",str_year))
jobs %<>% mutate(year=as.numeric(year))
jobs %<>% select(c(FIPS,State,County,UnemploymentRate,year))
jobs%<>% mutate(State=str_replace_all(State,"AL","Alabama"))
jobs%<>% mutate(State=str_replace_all(State,"AR","Arkansas"))
jobs%<>% mutate(State=str_replace_all(State,"AZ","Arizona"))
jobs%<>% mutate(State=str_replace_all(State,"CA","California"))
jobs%<>% mutate(State=str_replace_all(State,"CO","Colorado"))
jobs%<>% mutate(State=str_replace_all(State,"CT","Connecticut"))
jobs%<>% mutate(State=str_replace_all(State,"DE","Delaware"))
jobs%<>% mutate(State=str_replace_all(State,"FL","Florida"))
jobs%<>% mutate(State=str_replace_all(State,"GA","Georgia"))
jobs%<>% mutate(State=str_replace_all(State,"ID","Idaho"))
jobs%<>% mutate(State=str_replace_all(State,"IL","Illinois"))
jobs%<>% mutate(State=str_replace_all(State,"IN","Indiana"))
jobs%<>% mutate(State=str_replace_all(State,"IA","Iowa"))
jobs%<>% mutate(State=str_replace_all(State,"KS","Kansas"))
jobs%<>% mutate(State=str_replace_all(State,"KY","Kentucky"))
jobs%<>% mutate(State=str_replace_all(State,"LA","Louisiana"))
jobs%<>% mutate(State=str_replace_all(State,"ME","Maine"))
jobs%<>% mutate(State=str_replace_all(State,"MD","Maryland"))
jobs%<>% mutate(State=str_replace_all(State,"MA","Massachusetts"))
jobs%<>% mutate(State=str_replace_all(State,"MI","Michigan"))
jobs%<>% mutate(State=str_replace_all(State,"MN","Minnesota"))
jobs%<>% mutate(State=str_replace_all(State,"MS","Mississippi"))
jobs%<>% mutate(State=str_replace_all(State,"MO","Missouri"))
jobs%<>% mutate(State=str_replace_all(State,"MT","Montana"))
jobs%<>% mutate(State=str_replace_all(State,"NE","Nebraska"))
jobs%<>% mutate(State=str_replace_all(State,"NV","Nevada"))
jobs%<>% mutate(State=str_replace_all(State,"NH","New Hampshire"))
jobs%<>% mutate(State=str_replace_all(State,"NJ","New Jersey"))
jobs%<>% mutate(State=str_replace_all(State,"NM","New Mexico"))
jobs%<>% mutate(State=str_replace_all(State,"NY","New York"))
jobs%<>% mutate(State=str_replace_all(State,"NC","North Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"ND","North Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"OH","Ohio"))
jobs%<>% mutate(State=str_replace_all(State,"OK","Oklahoma"))
jobs%<>% mutate(State=str_replace_all(State,"OR","Oregon"))
jobs%<>% mutate(State=str_replace_all(State,"PA","Pennsylvania"))
jobs%<>% mutate(State=str_replace_all(State,"RI","Rhode Island"))
jobs%<>% mutate(State=str_replace_all(State,"SC","South Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"SD","South Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"TN","Tennessee"))
jobs%<>% mutate(State=str_replace_all(State,"TX","Texas"))
jobs%<>% mutate(State=str_replace_all(State,"UT","Utah"))
jobs%<>% mutate(State=str_replace_all(State,"VT","Vermont"))
jobs%<>% mutate(State=str_replace_all(State,"VA","Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WA","Washington"))
jobs%<>% mutate(State=str_replace_all(State,"WV","West Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WI","Wisconsin"))
jobs%<>% mutate(State=str_replace_all(State,"WY","Wyoming"))
jobs$SC<-paste(jobs$County,jobs$State,sep=",")
jobs$FIPS<-as.numeric(jobs$FIPS)
names(jobs)[names(jobs) == 'FIPS'] <- 'fips'
jobs_map<-inner_join(ld.tick.county,jobs)
## Joining, by = c("State", "County", "year", "fips", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
jobs_map<-jobs_map %>% filter (Presence_current==1)
#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")
deer$CntyName<-gsub(" County","",deer$CntyName)
names(deer)[names(deer)=='StateName']<-'State'
names(deer)[names(deer)=='CntyName']<-'County'
names(deer)[names(deer) == 'FIPS'] <- 'fips'
deer$SC<-paste(deer$County,deer$State,sep=",")
ld.pres.total<-inner_join(deer,ld.tick.county)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
ld.jobs.total<-inner_join(deer,jobs_map)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
#Building dataframe of average unemployment rate, average cases, total cases, and total population per state 2007-2015
u<- jobs_map %>% filter(Presence_current==1) %>% group_by(State,year) %>% summarize(av.UR=mean(UnemploymentRate),av.cases=mean(cases),tot.cases=sum(cases))
## Warning: Grouping rowwise data frame strips rowwise nature
#More than 250 cases
u250up <- u %>% filter(tot.cases>=250)
ggplot(u,aes(x=av.UR,y=tot.cases,col=as.factor(year)))+geom_point()
#Adding in population
z <- pop %>% group_by(state_name,year) %>% summarize(tot.pop=sum(pop))
z %<>% rename(State=state_name)
z <- inner_join(z,u)
## Joining, by = c("State", "year")
z %<>% mutate(incidence=(100000*(tot.cases/tot.pop)))
z_big <- z %>% filter(incidence>=5)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
#All of average cases per sets of 5 States
#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#All 50 states Unemployment Rate
ggplot(u,aes(x=year,y=av.UR,group=State,colour=State))+geom_line()
#Subsets of 5
#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u, State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,av.UR,group=State,colour=State))
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))
ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2000,stat=identity))
ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))
ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")
#Deer
ggplot(ld.pres.total,aes(x=(DeerDensityMax),y=log10(cases),group=DeerDensityMax))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).
ggplot(ld.pres.total,aes(x=(DeerDensityMode),y=log10(cases),group=DeerDensityMode))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).
#Population: As population increase so do cases
ld.pop.edit<-inner_join(ld.pop,ld.pres.total)
## Joining, by = c("State", "County", "cases", "year", "fips", "prcp", "avtemp", "mintmp", "maxtmp", "SC")
ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2005,stat=identity))+geom_smooth(method="lm")
ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2009,stat=identity))+geom_smooth(method="lm")
ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2008,stat=identity))+geom_smooth(method="lm")
#Temperature
ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")
ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))+geom_smooth(method="lm")
ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2005,stat=identity))+geom_smooth(method="lm")
ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2006,stat=identity))+geom_smooth(method="lm")
#Unemployment Rate
ggplot(z_big,aes(x=av.UR,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")
x<-ld.pop.edit %>% group_by(State,year) %>% summarize(tot.cases=sum(cases),av.prcp=mean(prcp),av.temp=mean(avtemp),min.temp=mean(mintmp),max.temp=mean(maxtmp),tot.pop=sum(pop),av.deermax=mean(DeerDensityMax),av.deermode=mean(DeerDensityMode)) %>% mutate(incidence=(100000*(tot.cases/tot.pop)))
## Warning: Grouping rowwise data frame strips rowwise nature
x_big<- x %>% filter(incidence>=5)
#Precipitation: more precipitation, higher incidence; 2012-2013 have mid level rainfall with highest incidence--> result of other variable?
ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")
ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(data=subset(x_big,year==2012),aes(col=as.factor(State)))+geom_smooth(method=lm)
#Temperature: average max temp has most negative relationship; higher the maximum temperature the lower the incidence
ggplot(x_big,aes(x=av.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")
ggplot(x_big,aes(x=min.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")
ggplot(x_big,aes(x=max.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")
#Deer
ggplot(data=subset(x_big,year==2000:2007),aes(av.deermax,incidence))+geom_boxplot(aes(col=as.factor(year)))
## Warning in year == 2000:2007: longer object length is not a multiple of
## shorter object length
## Warning: position_dodge requires non-overlapping x intervals
## GGPairs data visualization: from lyme_modeling example
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
#Big picture correlations
ggpairs(ld.jobs.total,columns = c("cases","prcp","avtemp","UnemploymentRate"))
#Combine all thinned out dataframes
all_av<-inner_join(x,z)
## Joining, by = c("State", "year", "tot.cases", "tot.pop", "incidence")
ggpairs(x,columns=c("tot.cases","av.prcp","av.temp","tot.pop"))
x %<>% mutate(log10totpop=log10(tot.pop))
x %<>% mutate(log10totcases=log10(tot.cases + 1))
ggpairs(x,columns=c("log10totpop","log10totcases","av.prcp","av.temp"))
all_av%<>% mutate(log10totpop=log10(tot.pop))
all_av%<>% mutate(log10totcases=log10(tot.cases + 1))
all_av %<>% mutate(log10in=log10(incidence+1))
ggpairs(all_av,columns=c("log10in","av.prcp","av.UR","av.temp"))
lm_UR_in<-lm(av.UR ~ log10in, data=all_av)
summary(lm_UR_in)
##
## Call:
## lm(formula = av.UR ~ log10in, data = all_av)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.226 -1.116 -0.048 1.312 3.400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.0279 0.2722 36.842 < 2e-16 ***
## log10in -1.5061 0.2060 -7.311 1.35e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.545 on 85 degrees of freedom
## Multiple R-squared: 0.3861, Adjusted R-squared: 0.3788
## F-statistic: 53.45 on 1 and 85 DF, p-value: 1.351e-10
#The modelr package: nesting method of organization is useful for explanatory modeling
by_state<-ld.pop.edit %>% group_by(State)
## Warning: Grouping rowwise data frame strips rowwise nature
by_state %<>% nest
#Write a function that takes a data frame as its argument and returns a liner model object that predicts size by year
linGrowth_model<-function(df){
lm(pop ~ year, data = df)
}
#State-wise statistical modeling exercise
models<-purrr::map(by_state$data,linGrowth_model)
#Add a column to the by_state df, where each row(State) has its own model object
by_state%<>%mutate(model=purrr::map(data,linGrowth_model))
library(modelr)
by_state%<>%mutate(resids=purrr::map2(data,model,add_residuals))
#Write a function that accepts an obj of the type in the resids list and returns a sum of the absolute values
sum_resids<-function(x){
sum(abs(x$resid))
}
by_state%<>%mutate(totalResid=purrr::map(resids,sum_resids))
#Write a function that accepts a linear model and returns the slope
get_slope<-function(model){
model$coefficients[2]
}
by_state%<>%mutate(slope=purrr::map(model,get_slope))
#Un-nest residual and slope column so not in list format
slopes<-unnest(by_state,slope)
totalResids<-unnest(by_state,totalResid)
#Plot growth rate for all states
slopes%>%ggplot(aes(State,slope))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))
#Plot total residuals for all states
totalResids%>%ggplot(aes(State,totalResid))+geom_point()+theme(axis.text.x=element_text(angle=90,hjust=1))
##Use different data frame name
by_state2 <- ld.pop.edit%>%group_by(State)%>%nest
## Warning: Grouping rowwise data frame strips rowwise nature
#Write a function that accepts an element of the by_state$data list-column and returns the spearman correlation coefficient between Lyme disease cases and precipitation
runCor<-function(df){
suppressWarnings(cor.test(df$cases,df$prcp,method="spearman")$estimate)
}
by_state2%<>%mutate(spCor=purrr::map(data,runCor))
spCors<-unnest(by_state2,spCor)
spCors%<>%arrange(desc(spCor))
spCors$State<-factor(spCors$State,levels=unique(spCors$State))
ggplot(spCors,aes(State,spCor))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))